from sys import executable
print(executable)
/Users/taehoonkim/miniforge3/envs/th/bin/python
# utility
import os
from copy import deepcopy
import warnings
# data handling
import pandas as pd
import numpy as np
import missingno as msno
# plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
# statistics
import scipy as sp
from scipy import stats
import pingouin as pg
# modeling
import sklearn
# plotting setting
from IPython.core.display import display, HTML
from IPython.display import Image
plt.style.use('seaborn') # plt.style.use('ggplot')
sns.set(font_scale=1.5)
plt.rc('font', family='AppleGothic') # For Windows
plt.rcParams['figure.figsize'] = [10, 5]
mpl.rcParams['axes.unicode_minus'] = False
%matplotlib inline
# ignore warnings
warnings.filterwarnings('ignore')
proj_dir=os.getcwd()
hcp_dir=(proj_dir + '/hcp_data')
os.listdir(hcp_dir)
['.DS_Store', 'shen_edge_info.csv', 'fc_wm_df.csv', 'fc_rest_df.csv', 'fc_lang_df.csv', 'fc_gamble_df.csv', 'fc_social_df.csv', 'behav_df.csv']
# behav data
behav_dat = pd.read_csv('%s/behav_df.csv' % (hcp_dir))
behav_dat = behav_dat[['Subject', 'PMAT24_A_CR', 'ListSort_Unadj', 'PicVocab_Unadj',
'ReadEng_Unadj', 'ProcSpeed_Unadj', 'PicSeq_Unadj',
'DDisc_AUC_40K']]
behav_dat.columns = ['sn', 'fi', 'wm', 'viq1', 'viq2', 'ps', 'lm', 'dd']
behav_dat.head(3)
| sn | fi | wm | viq1 | viq2 | ps | lm | dd | |
|---|---|---|---|---|---|---|---|---|
| 0 | 100206 | 20 | 112.89 | 119.89140 | 113.5460 | 138.72 | 125.07 | 0.050000 |
| 1 | 100610 | 23 | 117.39 | 140.81510 | 141.3166 | 111.11 | 109.04 | 0.868750 |
| 2 | 101006 | 11 | 93.90 | 95.42348 | 113.5374 | 90.59 | 84.68 | 0.783073 |
# fMRI data
fMRI_dat = pd.read_csv('%s/fc_wm_df.csv' % (hcp_dir))
fMRI_dat.rename(columns={'Unnamed: 0':'sn'}, inplace =True)
fMRI_dat.head(3)
| sn | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ... | 35768 | 35769 | 35770 | 35771 | 35772 | 35773 | 35774 | 35775 | 35776 | 35777 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100206 | 0.09445 | 0.20970 | 0.09945 | -0.08035 | 0.02295 | 0.06575 | 0.12480 | -0.01735 | 0.08415 | ... | -0.00625 | 0.02345 | 0.05515 | 0.05585 | -0.11020 | 0.05545 | 0.02955 | -0.00635 | 0.00265 | 0.03775 |
| 1 | 100610 | 0.00705 | 0.04235 | 0.03755 | -0.04520 | -0.01150 | 0.17820 | 0.18140 | 0.15245 | 0.08160 | ... | 0.04685 | 0.05340 | -0.02835 | 0.05240 | -0.07905 | 0.01390 | 0.17020 | 0.03995 | 0.00625 | 0.14390 |
| 2 | 101006 | -0.05285 | -0.00300 | 0.05110 | 0.05520 | 0.13180 | 0.14115 | 0.16645 | 0.04335 | 0.10420 | ... | 0.08555 | -0.03635 | -0.14520 | -0.07760 | -0.00390 | -0.11595 | -0.07515 | 0.02550 | -0.04360 | 0.08760 |
3 rows × 35779 columns
behav_dat.dtypes
sn int64 fi int64 wm float64 viq1 float64 viq2 float64 ps float64 lm float64 dd float64 dtype: object
fMRI_dat.dtypes
sn int64
0 float64
1 float64
2 float64
3 float64
...
35773 float64
35774 float64
35775 float64
35776 float64
35777 float64
Length: 35779, dtype: object
behav_dat.isnull().sum().sum(), fMRI_dat.isnull().sum().sum()
(0, 0)
df_target = fMRI_dat.copy()
df_label = behav_dat.copy()
fig, ax = plt.subplots(1,1,figsize=(12, 4))
msno.matrix(df=df_label, color=(0.63, 0.79, 0.95), ax = ax)
# msno.matrix(df=df_target, color=(0.54, 0.81, 0.94))
plt.tight_layout()
plt.show()
ax_list = []
t_col = ['sn', 'fi', 'wm', 'viq1', 'viq2', 'ps', 'lm', 'dd']
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(nrows= 2, ncols= 4, figsize=[12, 6])
for idx, name in enumerate(ax_list):
sns.histplot(data=df_label[('%s' % (t_col[idx]))], kde=True, ax=axes[name[0], name[1]])
plt.tight_layout()
plt.show()
df_target.shape
(337, 35779)
ax_list = []
t_col = ['0', '4471', '8943', '14000', '18000', '23000', '27000', '35770']
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(nrows= 2, ncols= 4, figsize=[12, 6])
for idx, name in enumerate(ax_list):
sns.kdeplot(data=df_target[('%s' % (t_col[idx]))], ax=axes[name[0], name[1]])
plt.tight_layout()
plt.show()
def outlier_check(df, method=1):
if method == 1: # Boxplot Method
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
lower = Q1-(IQR*1.5)
upper = Q1+(IQR*1.5)
elif method == 2: # Normal Distribution Method
Mean = df.mean()
std = df.std()
lower = Mean-std*3
upper = Mean+std*3
out_mask = ((df < lower) | (df > upper))
out_info = pd.concat([pd.DataFrame(lower, columns=['lower']),
pd.DataFrame(upper, columns=['upper']),
pd.DataFrame(df.min(), columns=['min']),
pd.DataFrame(df.max(), columns=['max']),
pd.DataFrame(((df < lower) | (df > upper)).sum(), columns=['counts'])], axis=1)
return out_info, out_mask
ax_list = []
t_col = ['sn', 'fi', 'wm', 'viq1', 'viq2', 'ps', 'lm', 'dd']
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, ax = plt.subplots(nrows= 2, ncols= 4, figsize=[12, 6])
for idx, name in enumerate(ax_list):
sns.boxplot(x=df_label[('%s' % (t_col[idx]))], ax = ax[name[0], name[1]])
plt.tight_layout()
plt.show()
behav_info, behav_mask = outlier_check(df_label, 2) # 3sd method
behav_info
| lower | upper | min | max | counts | |
|---|---|---|---|---|---|
| sn | -431501.421111 | 1.158559e+06 | 100206.000000 | 994273.000000 | 0 |
| fi | 2.906306 | 3.111150e+01 | 6.000000 | 24.000000 | 0 |
| wm | 77.784480 | 1.438458e+02 | 80.790000 | 144.500000 | 1 |
| viq1 | 88.714911 | 1.446018e+02 | 94.571660 | 142.830100 | 0 |
| viq2 | 83.861033 | 1.492981e+02 | 84.200000 | 150.710000 | 2 |
| ps | 67.888879 | 1.609584e+02 | 60.090000 | 154.690000 | 1 |
| lm | 73.837417 | 1.517133e+02 | 82.370000 | 135.550000 | 0 |
| dd | -0.382529 | 1.379271e+00 | 0.015625 | 0.984375 | 0 |
fc_info, fc_mask = outlier_check(df_target, 2) # 3sd method
fc_info.counts.sum()
73730
fc_info.counts.sum()/(df_target.shape[0]*df_target.shape[1])
0.006114854601562858
sns.histplot(fc_info.counts, kde = True)
plt.show()
fig, axes = plt.subplots(1, 2, figsize=[15, 6])
g1 = sns.heatmap(sp.spatial.distance.squareform(df_target.iloc[0,1:]),
vmax=-1, vmin=+1,
square=True, cmap="vlag",
linecolor='white', ax = axes[0])
g1.set_title('sub1')
g2 = sns.heatmap(sp.spatial.distance.squareform(df_target.iloc[1,1:]),
vmax=-1, vmin=+1,
square=True, cmap="vlag",
linecolor='white', ax = axes[1])
g2.set_title('sub2')
plt.tight_layout()
plt.show()
subj_idx = list(behav_dat.sn.values)
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold, mutual_info_regression, f_regression
from sklearn.feature_selection import SelectKBest, SelectPercentile
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.cross_decomposition import PLSRegression
# fs_method = ['none', 'VarThr', 'InfGain', 'f-Reg']
fs_method = ['none', 'VarThr', 'f-Reg']
# 0 - none, 1 - variance Threshold, 2 - Information Gain, 3 - F-regression
fs_results = [];
x_fs = [];
x_tSn = [];
x_tSn_act = [];
x_tSn_r_pred = [];
x_nFeature = [];
for fs_idx, fs_name in enumerate(fs_method):
print("%d. method %s processing ----------------------------\n\n" % (fs_idx+1, fs_name))
for idx, this_sub in enumerate(subj_idx):
# subject index
train_subj = deepcopy(subj_idx)
train_subj.remove(this_sub)
test_subj = this_sub
# prepare data
X_train_r = df_target.loc[df_target['sn'] != test_subj, '0':]
y_train = df_label.loc[df_label['sn'] != test_subj, 'fi']
X_test_r = df_target.loc[df_target['sn'] == test_subj, '0':]
y_test = df_label.loc[df_label['sn'] == test_subj, 'fi']
# feature scaling
scaler = StandardScaler()
targ_col = X_train_r.columns.values
X_train_r = scaler.fit_transform(X_train_r)
X_train_r = pd.DataFrame(X_train_r)
X_train_r.columns = targ_col
X_test_r = scaler.transform(X_test_r)
X_test_r = pd.DataFrame(X_test_r)
X_test_r.columns = targ_col
# feature selection
if fs_name == 'none':
X_train_r_fs = np.array(X_train_r)
X_test_r_fs = np.array(X_test_r)
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'VarThr': # Variance Threshold
Threshold = 0.2
selector_var=VarianceThreshold(threshold=Threshold)
selector_var.fit(X_train_r)
is_support_var = selector_var.variances_ > Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_var].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_var].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'InfGain': # Information Gain
Threshold = 0.02
selector_ifg=SelectKBest(mutual_info_regression, k='all')
selector_ifg.fit(X_train_r, y_train)
is_support_ifg = selector_ifg.scores_ > Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_ifg].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_ifg].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'f-Reg':
Threshold = 0.05
selector_freg=SelectKBest(f_regression, k='all')
selector_freg.fit(X_train_r, y_train)
is_support_freg = selector_freg.pvalues_ < Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_freg].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_freg].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
#model_r = Ridge()
model_r = Ridge()
model_r.fit(X_train_r_fs, y_train_fs)
y_pred_r = model_r.predict(X_test_r_fs)
x_fs.append(fs_name)
x_tSn.append(this_sub)
x_tSn_act.append(y_test_fs[0]);
x_tSn_r_pred.append(y_pred_r[0]);
x_nFeature.append(X_train_r_fs.shape[1])
print("%d. method %s done ----------------------------\n\n" % (fs_idx+1, fs_name))
fs_results = pd.DataFrame({'fs_method': x_fs,
'tSN': x_tSn,
'behav_act': x_tSn_act,
'behav_pred': x_tSn_r_pred,
'nFeature': x_nFeature})
1. method none processing ---------------------------- 1. method none done ---------------------------- 2. method VarThr processing ---------------------------- 2. method VarThr done ---------------------------- 3. method f-Reg processing ---------------------------- 3. method f-Reg done ----------------------------
fs_results
| fs_method | tSN | behav_act | behav_pred | nFeature | |
|---|---|---|---|---|---|
| 0 | none | 100206 | 20 | 18.725468 | 35778 |
| 1 | none | 100610 | 23 | 19.902389 | 35778 |
| 2 | none | 101006 | 11 | 15.176511 | 35778 |
| 3 | none | 101309 | 15 | 15.269803 | 35778 |
| 4 | none | 101915 | 21 | 18.415946 | 35778 |
| ... | ... | ... | ... | ... | ... |
| 1006 | f-Reg | 966975 | 14 | 14.221206 | 6245 |
| 1007 | f-Reg | 984472 | 22 | 20.210941 | 6201 |
| 1008 | f-Reg | 987983 | 22 | 20.596465 | 6172 |
| 1009 | f-Reg | 993675 | 21 | 20.229134 | 6160 |
| 1010 | f-Reg | 994273 | 20 | 16.056627 | 6215 |
1011 rows × 5 columns
fs_method = ['none', 'VarThr', 'f-Reg']
x_fs = [];
x_corr = [];
x_corr_p = [];
x_rmse = [];
x_mape = [];
x_nf = [];
for idx, name in enumerate(fs_method):
tmp_df= fs_results.loc[fs_results['fs_method'] == name, ['behav_act', 'behav_pred', 'nFeature']]
tmp_corr = pg.corr(tmp_df.behav_act, tmp_df.behav_pred)
rmse = mean_squared_error(tmp_df['behav_act'], tmp_df['behav_pred'])**0.5
mape = mean_absolute_percentage_error(tmp_df['behav_act'], tmp_df['behav_pred'])
x_fs.append(name)
x_corr.append(tmp_corr['r'][0])
x_corr_p.append(tmp_corr['p-val'][0])
x_rmse.append(rmse)
x_mape.append(mape)
x_nf.append(np.mean(tmp_df.nFeature))
fs_fnl = pd.DataFrame({'fs_method': x_fs,
'r' : x_corr,
'p_r' : x_corr_p,
'rmse' : x_rmse,
'mape' : x_mape,
'nF' : x_nf})
fs_fnl
| fs_method | r | p_r | rmse | mape | nF | |
|---|---|---|---|---|---|---|
| 0 | none | 0.497236 | 1.898063e-22 | 4.072995 | 0.242767 | 35778.000000 |
| 1 | VarThr | 0.497236 | 1.898063e-22 | 4.072995 | 0.242767 | 35778.000000 |
| 2 | f-Reg | 0.497236 | 1.898063e-22 | 4.165178 | 0.249607 | 6189.038576 |
fig, axes = plt.subplots(1,3, figsize = [15,5])
sns.barplot(data=fs_fnl, x='fs_method', y ='r', ax = axes[0])
sns.barplot(data=fs_fnl, x='fs_method', y ='rmse', ax = axes[1])
sns.barplot(data=fs_fnl, x='fs_method', y ='mape', ax = axes[2])
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(1,3, figsize = [15,6])
for idx, name in enumerate(fs_method):
tmp_df= fs_results.loc[fs_results['fs_method'] == name, ['behav_act', 'behav_pred']]
sns.regplot(x='behav_act', y='behav_pred', data=tmp_df, ax = axes[idx]);
axes[idx].set_title(('%s' % name))
plt.tight_layout()
plt.show()
# import module
from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import LinearRegression, BayesianRidge, Ridge, Lasso, BayesianRidge
from sklearn.svm import LinearSVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold, LeaveOneOut, GridSearchCV
from sklearn.model_selection import cross_validate, cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
def optimise_pls_cv(X, y, n_comp):
# Define PLS object
pls = PLSRegression(n_components=n_comp)
# Cross-validation
y_cv = cross_val_predict(pls, X, y, cv=12)
# Calculate scores
r2 = r2_score(y, y_cv)
mse = mean_squared_error(y, y_cv)
rpd = y.std()/np.sqrt(mse)
return (y_cv, r2, mse, rpd)
def pick_nc(vals1, vals2, vals3, ylabel1, ylabel2, ylabel3, objective1, objective2, objective3):
if objective1 == 'min':
idx = np.argmin(vals1)
else:
idx = np.argmax(vals1)
if objective2 == 'min':
idx = np.argmin(vals2)
else:
idx = np.argmax(vals2)
if objective3=='min':
idx = np.argmin(vals3)
else:
idx = np.argmax(vals3)
nc = xticks[idx]
return nc
# Plot the mses
def plot_metrics(vals1, vals2, vals3, ylabel1, ylabel2, ylabel3, objective1, objective2, objective3):
with plt.style.context('ggplot'):
plt.subplot(1,3,1)
plt.plot(xticks, np.array(vals1), '-v', color='blue', mfc='blue')
if objective1=='min':
idx = np.argmin(vals1)
else:
idx = np.argmax(vals1)
plt.plot(xticks[idx], np.array(vals1)[idx], 'P', ms=10, mfc='red')
plt.xlabel('Number of PLS components')
plt.xticks = xticks
plt.ylabel(ylabel1)
plt.title('PLS - nComp %d' % xticks[idx])
plt.subplot(1,3,2)
plt.plot(xticks, np.array(vals2), '-v', color='blue', mfc='blue')
if objective2=='min':
idx = np.argmin(vals2)
else:
idx = np.argmax(vals2)
plt.plot(xticks[idx], np.array(vals2)[idx], 'P', ms=10, mfc='red')
plt.xlabel('Number of PLS components')
plt.xticks = xticks
plt.ylabel(ylabel2)
plt.title('PLS - nComp %d' % xticks[idx])
plt.subplot(1,3,3)
plt.plot(xticks, np.array(vals3), '-v', color='blue', mfc='blue')
if objective3=='min':
idx = np.argmin(vals3)
else:
idx = np.argmax(vals3)
plt.plot(xticks[idx], np.array(vals3)[idx], 'P', ms=10, mfc='red')
plt.xlabel('Number of PLS components')
plt.xticks = xticks
plt.ylabel(ylabel3)
plt.title('PLS - Comp %d' % xticks[idx])
nc = xticks[idx]
return nc
plt.show()
x_targ = np.array(df_target.loc[:, '0':])
y_targ = np.array(df_label.loc[:,'fi'])
# test with 20 components
r2s_r = []; mses_r = []; rpds_r = [];
xticks = np.arange(1, 20)
for n_comp in xticks:
y_cv, r2, mse, rpd = optimise_pls_cv(x_targ, y_targ, n_comp)
r2s_r.append(r2)
mses_r.append(mse)
rpds_r.append(rpd)
plt.figure(figsize=(15,5))
nc=plot_metrics(mses_r, rpds_r, r2s_r, 'MSE', 'RPD', 'R2', 'min', 'max', 'max')
plt.tight_layout()
plt.show()
print("\nRest-sFC", "/", "ncomp:", nc)
nc_r = nc
Rest-sFC / ncomp: 5
Pls = PLSRegression(n_components=nc_r)
Ridge = Ridge()
Lasso = Lasso()
SVR = LinearSVR()
# Linear = LinearRegression()
# bRdige = BayesianRidge()
# Tree = DecisionTreeRegressor()
Model_list = [Pls, Ridge, Lasso, SVR]
def simple_fit(model, X_train, y_train):
cv = KFold(n_splits = 10, random_state = 0, shuffle=True)
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)
train_mse = mean_squared_error(y_train, y_pred)
train_rmse = mean_squared_error(y_train, y_pred)**0.5
train_mape = mean_absolute_percentage_error(y_train, y_pred)
train_r2 = r2_score(y_train, y_pred)
return y_pred.flatten(), train_mse, train_rmse, train_mape, train_r2
ms_df = pd.DataFrame(columns=[
'Model', 'MSE', 'RMSE', 'MAPE', 'R2', 'r', 'r_p'])
x_targ = np.array(df_target.loc[:, '0':])
y_targ = np.array(df_label.loc[:,'fi'])
i = 0
for model in Model_list:
i += 1
print(model)
y_pred, train_mse, train_rmse, train_mape, train_r2 = simple_fit(model, x_targ, y_targ)
tmp_corr = pg.corr(y_targ, y_pred)
ms_tmp = {'Model': str(model),
'MSE': train_mse,
'RMSE': train_rmse,
'MAPE': train_mape ,
'R2': train_r2 ,
'r': tmp_corr['r'][0],
'r_p': tmp_corr['p-val'][0]}
ms_df = ms_df.append(ms_tmp, ignore_index=True)
PLSRegression(n_components=5) Ridge() Lasso() LinearSVR()
ms_df
| Model | MSE | RMSE | MAPE | R2 | r | r_p | |
|---|---|---|---|---|---|---|---|
| 0 | PLSRegression(n_components=5) | 16.763407 | 4.094314 | 0.243158 | 0.239153 | 0.489318 | 1.086707e-21 |
| 1 | Ridge() | 17.535706 | 4.187566 | 0.247941 | 0.204100 | 0.462284 | 3.019919e-19 |
| 2 | Lasso() | 22.168176 | 4.708309 | 0.300326 | -0.006155 | -0.161368 | 2.970222e-03 |
| 3 | LinearSVR() | 18.228896 | 4.269531 | 0.250454 | 0.172638 | 0.454301 | 1.449408e-18 |
fig, axes = plt.subplots(4,1, figsize = [12,8])
sns.barplot(data=ms_df, y='Model', x ='r', ax = axes[0])
sns.barplot(data=ms_df, y='Model', x ='RMSE', ax = axes[1])
sns.barplot(data=ms_df, y='Model', x ='MAPE', ax = axes[2])
sns.barplot(data=ms_df, y='Model', x ='R2', ax = axes[3])
plt.tight_layout()
plt.show()
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold, mutual_info_regression, f_regression
from sklearn.feature_selection import SelectKBest, SelectPercentile
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.cross_decomposition import PLSRegression
subj_idx = list(behav_dat.sn.values)
#b_type = ['fi', 'wm', 'viq1', 'viq2', 'ps', 'lm', ' dd']
b_type = ['fi', 'wm', 'viq1', 'viq2', 'ps', 'lm', 'dd']
fs_method = ['none', 'f-Reg']
fs_results = [];
x_model = [];
x_model_comp = [];
x_btype = [];
x_fs = [];
x_tSn = [];
x_tSn_act = [];
x_tSn_r_pred = [];
x_nFeature = [];
for b_idx , b_name in enumerate(b_type):
for fs_idx, fs_name in enumerate(fs_method):
print("%d. %s, %s processing ----------------------------\n\n" % (fs_idx+1, b_name, fs_name))
for idx, this_sub in enumerate(subj_idx):
# subject index
train_subj = deepcopy(subj_idx)
train_subj.remove(this_sub)
test_subj = this_sub
# prepare data
X_train_r = df_target.loc[df_target['sn'] != test_subj, '0':]
y_train = df_label.loc[df_label['sn'] != test_subj, b_name]
X_test_r = df_target.loc[df_target['sn'] == test_subj, '0':]
y_test = df_label.loc[df_label['sn'] == test_subj, b_name]
# feature scaling
scaler = StandardScaler()
targ_col = X_train_r.columns.values
X_train_r = scaler.fit_transform(X_train_r)
X_train_r = pd.DataFrame(X_train_r)
X_train_r.columns = targ_col
X_test_r = scaler.transform(X_test_r)
X_test_r = pd.DataFrame(X_test_r)
X_test_r.columns = targ_col
# feature selection
if fs_name == 'none':
X_train_r_fs = np.array(X_train_r)
X_test_r_fs = np.array(X_test_r)
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'f-Reg':
Threshold = 0.05
selector_freg=SelectKBest(f_regression, k='all')
selector_freg.fit(X_train_r, y_train)
is_support_freg = selector_freg.pvalues_ < Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_freg].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_freg].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
# PLSR optimization
r2s_r = []; mses_r = []; rpds_r = [];
xticks = np.arange(1, 10)
for n_comp in xticks:
y_cv, r2, mse, rpd = optimise_pls_cv(x_targ, y_targ, n_comp)
r2s_r.append(r2)
mses_r.append(mse)
rpds_r.append(rpd)
nc = pick_nc(mses_r, rpds_r, r2s_r, 'MSE', 'RPD', 'R2', 'min', 'max', 'max')
nc = 5
# Model Fitting & Test
model_r = PLSRegression(n_components=nc)
model_r.fit(X_train_r_fs, y_train_fs)
y_pred_r = model_r.predict(X_test_r_fs)
# saving results
x_model.append('PLSR')
x_model_comp.append(nc)
x_btype.append(b_name)
x_fs.append(fs_name)
x_tSn.append(this_sub)
x_tSn_act.append(y_test_fs[0]);
x_tSn_r_pred.append(y_pred_r[0]);
x_nFeature.append(X_train_r_fs.shape[1])
print("%d. %s, %s done ----------------------------\n\n" % (fs_idx+1, b_name, fs_name))
fs_results = pd.DataFrame({'model': x_model,
'mComp': x_model_comp,
'ability': x_btype,
'fs': x_fs,
'nFeature': x_nFeature,
'tSN': x_tSn,
'behav_act': x_tSn_act,
'behav_pred': x_tSn_r_pred})
1. fi, none processing ---------------------------- 1. fi, none done ---------------------------- 2. fi, f-Reg processing ---------------------------- 2. fi, f-Reg done ---------------------------- 1. wm, none processing ---------------------------- 1. wm, none done ---------------------------- 2. wm, f-Reg processing ---------------------------- 2. wm, f-Reg done ---------------------------- 1. viq1, none processing ---------------------------- 1. viq1, none done ---------------------------- 2. viq1, f-Reg processing ---------------------------- 2. viq1, f-Reg done ---------------------------- 1. viq2, none processing ---------------------------- 1. viq2, none done ---------------------------- 2. viq2, f-Reg processing ---------------------------- 2. viq2, f-Reg done ---------------------------- 1. ps, none processing ---------------------------- 1. ps, none done ---------------------------- 2. ps, f-Reg processing ---------------------------- 2. ps, f-Reg done ---------------------------- 1. lm, none processing ---------------------------- 1. lm, none done ---------------------------- 2. lm, f-Reg processing ---------------------------- 2. lm, f-Reg done ---------------------------- 1. dd, none processing ---------------------------- 1. dd, none done ---------------------------- 2. dd, f-Reg processing ---------------------------- 2. dd, f-Reg done ----------------------------
fs_results = pd.DataFrame({'model': x_model,
'mComp': x_model_comp,
'ability': x_btype,
'fs': x_fs,
'nFeature': x_nFeature,
'tSN': x_tSn,
'behav_act': x_tSn_act,
'behav_pred': x_tSn_r_pred})
fs_results.behav_pred = np.concatenate(fs_results.behav_pred)
fs_results.to_csv('hcp_plsr_allBehav.csv')
fs_results
| model | mComp | ability | fs | nFeature | tSN | behav_act | behav_pred | |
|---|---|---|---|---|---|---|---|---|
| 0 | PLSR | 5 | fi | none | 35778 | 100206 | 20.000000 | 18.815125 |
| 1 | PLSR | 5 | fi | none | 35778 | 100610 | 23.000000 | 19.961088 |
| 2 | PLSR | 5 | fi | none | 35778 | 101006 | 11.000000 | 15.335247 |
| 3 | PLSR | 5 | fi | none | 35778 | 101309 | 15.000000 | 15.364958 |
| 4 | PLSR | 5 | fi | none | 35778 | 101915 | 21.000000 | 18.361611 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4713 | PLSR | 5 | dd | f-Reg | 4191 | 966975 | 0.358203 | 0.289030 |
| 4714 | PLSR | 5 | dd | f-Reg | 4212 | 984472 | 0.760156 | 0.471012 |
| 4715 | PLSR | 5 | dd | f-Reg | 4194 | 987983 | 0.245703 | 0.484450 |
| 4716 | PLSR | 5 | dd | f-Reg | 4178 | 993675 | 0.938281 | 0.438770 |
| 4717 | PLSR | 5 | dd | f-Reg | 4194 | 994273 | 0.529427 | 0.670261 |
4718 rows × 8 columns
b_type = ['fi', 'wm', 'viq1', 'viq2', 'ps', 'lm', 'dd']
fs_method = ['none', 'f-Reg']
x_model = [];
x_nComp = [];
x_fs = [];
x_b = [];
x_corr = [];
x_corr_p = [];
x_rmse = [];
x_mape = [];
x_r2 = [];
x_nf = [];
for b_idx, b_name in enumerate(b_type):
tmp_df_raw = fs_results.loc[fs_results['ability'] == b_name,:]
for fs_idx, fs_name in enumerate(fs_method):
tmp_df = tmp_df_raw.loc[tmp_df_raw['fs'] == fs_name,
['model', 'mComp', 'fs', 'ability', 'behav_act', 'behav_pred', 'nFeature']].copy()
tmp_corr = pg.corr(tmp_df['behav_act'], tmp_df['behav_pred']).copy()
rmse = mean_squared_error(tmp_df['behav_act'], tmp_df['behav_pred'])**0.5
mape = mean_absolute_percentage_error(tmp_df['behav_act'], tmp_df['behav_pred'])
r2 = r2_score(tmp_df['behav_act'], tmp_df['behav_pred'])
x_model.append(tmp_df.iloc[0,0])
x_nComp.append(np.mean(tmp_df.iloc[:,1]))
x_b.append(b_name)
x_fs.append(fs_name)
x_corr.append(tmp_corr['r'][0])
x_corr_p.append(tmp_corr['p-val'][0])
x_rmse.append(rmse)
x_mape.append(mape)
x_r2.append(r2)
x_nf.append(np.mean(tmp_df.nFeature))
fs_fnl = pd.DataFrame({'model' : x_model,
'mComp' : x_nComp,
'ability': x_b,
'fs_method': x_fs,
'r' : x_corr,
'p_r' : x_corr_p,
'rmse' : x_rmse,
'mape' : x_mape,
'r2' : x_r2,
'nF' : x_nf})
fs_fnl
| model | mComp | ability | fs_method | r | p_r | rmse | mape | r2 | nF | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | PLSR | 5.0 | fi | none | 0.505279 | 3.076689e-23 | 4.051057 | 0.240350 | 0.255145 | 35778.000000 |
| 1 | PLSR | 5.0 | fi | f-Reg | 0.449262 | 3.820149e-18 | 4.198939 | 0.250485 | 0.199771 | 6189.038576 |
| 2 | PLSR | 5.0 | wm | none | 0.321662 | 1.499904e-09 | 10.563599 | 0.077487 | 0.076743 | 35778.000000 |
| 3 | PLSR | 5.0 | wm | f-Reg | 0.275815 | 2.680762e-07 | 10.796063 | 0.079738 | 0.035661 | 4629.029674 |
| 4 | PLSR | 5.0 | viq1 | none | 0.469098 | 7.660483e-20 | 8.221041 | 0.055509 | 0.218683 | 35778.000000 |
| 5 | PLSR | 5.0 | viq1 | f-Reg | 0.430214 | 1.292015e-16 | 8.424243 | 0.056750 | 0.179581 | 4674.623145 |
| 6 | PLSR | 5.0 | viq2 | none | 0.431262 | 1.070621e-16 | 9.843650 | 0.066463 | 0.182932 | 35778.000000 |
| 7 | PLSR | 5.0 | viq2 | f-Reg | 0.382201 | 3.648855e-13 | 10.139635 | 0.068291 | 0.133058 | 4646.296736 |
| 8 | PLSR | 5.0 | ps | none | 0.192993 | 3.662562e-04 | 15.571088 | 0.107523 | -0.010685 | 35778.000000 |
| 9 | PLSR | 5.0 | ps | f-Reg | 0.118187 | 3.006977e-02 | 16.224287 | 0.111643 | -0.097259 | 3139.664688 |
| 10 | PLSR | 5.0 | lm | none | 0.156225 | 4.041300e-03 | 13.192669 | 0.095821 | -0.036222 | 35778.000000 |
| 11 | PLSR | 5.0 | lm | f-Reg | 0.262775 | 9.970165e-07 | 12.749880 | 0.092030 | 0.032169 | 2665.094955 |
| 12 | PLSR | 5.0 | dd | none | 0.201989 | 1.893615e-04 | 0.294782 | 1.581147 | -0.010835 | 35778.000000 |
| 13 | PLSR | 5.0 | dd | f-Reg | 0.135244 | 1.295708e-02 | 0.304925 | 1.628199 | -0.081599 | 4186.928783 |
fig, axes = plt.subplots(4,1, figsize = [14,15])
g1 = sns.barplot(data=fs_fnl, y='ability', x ='r', hue = 'fs_method', ax = axes[0])
g1.legend(bbox_to_anchor=(1, 1))
g2 = sns.barplot(data=fs_fnl, y='ability', x ='rmse', hue = 'fs_method', ax = axes[1])
g2.legend(bbox_to_anchor=(1, 1))
g3 = sns.barplot(data=fs_fnl, y='ability', x ='mape', hue = 'fs_method', ax = axes[2])
g3.legend(bbox_to_anchor=(1, 1))
g4 = sns.barplot(data=fs_fnl, y='ability', x ='r2', hue = 'fs_method', ax = axes[3])
g4.legend(bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(b_type):
tmp_df = fs_results.loc[(fs_results['fs'] == 'none') & (fs_results['ability'] == name),
['behav_act', 'behav_pred']]
g=sns.regplot(x='behav_act', y='behav_pred', data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
edge_dat = pd.read_csv('%s/shen_edge_info.csv' % (hcp_dir))
net_list = ['Medial frontal', 'Fronto Parietal',
'Default Mode', 'Subcortical-cereb',
'Motor', 'Visual I', 'Visual II',
'Visual Association']
X_df = df_target.loc[:, '0':]
y_df = df_label.loc[:, 'fi':]
X_df = pd.concat([pd.DataFrame(edge_dat['net']),
X_df.T.reset_index(drop=True)], axis=1).T
X_df_1 = X_df.loc[0:, X_df.loc['net',:] == 1]
X_df_2 = X_df.loc[0:, X_df.loc['net',:] == 2]
X_df_3 = X_df.loc[0:, X_df.loc['net',:] == 3]
X_df_4 = X_df.loc[0:, X_df.loc['net',:] == 4]
X_df_5 = X_df.loc[0:, X_df.loc['net',:] == 5]
X_df_6 = X_df.loc[0:, X_df.loc['net',:] == 6]
X_df_7 = X_df.loc[0:, X_df.loc['net',:] == 7]
X_df_8 = X_df.loc[0:, X_df.loc['net',:] == 8]
b_list = y_df.columns.values
targ_b = b_list[0]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df[targ_b], method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 337 | 0 | 0.222327 | [0.12, 0.32] | 0.000038 | 0.985292 |
| Fronto Parietal | 337 | 0 | 0.166537 | [0.06, 0.27] | 0.00216 | 0.867863 |
| Default Mode | 337 | 0 | 0.113654 | [0.01, 0.22] | 0.037032 | 0.55128 |
| Subcortical-cereb | 337 | 0 | 0.194986 | [0.09, 0.3] | 0.000317 | 0.951002 |
| Motor | 337 | 0 | 0.175335 | [0.07, 0.28] | 0.00123 | 0.900115 |
| Visual I | 337 | 0 | 0.284552 | [0.18, 0.38] | 0.0 | 0.999657 |
| Visual II | 337 | 0 | 0.009518 | [-0.1, 0.12] | 0.861805 | 0.053434 |
| Visual Association | 337 | 0 | 0.058013 | [-0.05, 0.16] | 0.288275 | 0.186009 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df[targ_b]], axis = 1)
tmp_df.columns = ['sFC', targ_b]
g=sns.regplot(x='sFC', y=targ_b, data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
targ_b = b_list[1]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df[targ_b], method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 337 | 0 | 0.166834 | [0.06, 0.27] | 0.00212 | 0.869057 |
| Fronto Parietal | 337 | 0 | 0.202749 | [0.1, 0.3] | 0.000179 | 0.964273 |
| Default Mode | 337 | 0 | 0.046052 | [-0.06, 0.15] | 0.399393 | 0.134545 |
| Subcortical-cereb | 337 | 0 | 0.120248 | [0.01, 0.22] | 0.027295 | 0.599187 |
| Motor | 337 | 0 | 0.198117 | [0.09, 0.3] | 0.000252 | 0.956759 |
| Visual I | 337 | 0 | 0.179025 | [0.07, 0.28] | 0.000964 | 0.911802 |
| Visual II | 337 | 0 | 0.047177 | [-0.06, 0.15] | 0.387967 | 0.138849 |
| Visual Association | 337 | 0 | -0.012165 | [-0.12, 0.09] | 0.823928 | 0.055648 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df[targ_b]], axis = 1)
tmp_df.columns = ['sFC', targ_b]
g=sns.regplot(x='sFC', y=targ_b, data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
targ_b = b_list[2]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df[targ_b], method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 337 | 0 | 0.036903 | [-0.07, 0.14] | 0.49957 | 0.103654 |
| Fronto Parietal | 337 | 1 | 0.026135 | [-0.08, 0.13] | 0.633104 | 0.076469 |
| Default Mode | 337 | 1 | -0.001856 | [-0.11, 0.11] | 0.972966 | 0.050084 |
| Subcortical-cereb | 337 | 0 | 0.102683 | [-0.0, 0.21] | 0.059701 | 0.470433 |
| Motor | 337 | 1 | 0.030647 | [-0.08, 0.14] | 0.575609 | 0.086598 |
| Visual I | 337 | 1 | 0.17799 | [0.07, 0.28] | 0.00105 | 0.907821 |
| Visual II | 337 | 1 | -0.036262 | [-0.14, 0.07] | 0.507698 | 0.101603 |
| Visual Association | 337 | 1 | 0.132537 | [0.03, 0.24] | 0.015052 | 0.683014 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df[targ_b]], axis = 1)
tmp_df.columns = ['sFC', targ_b]
g=sns.regplot(x='sFC', y=targ_b, data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
targ_b = b_list[3]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df[targ_b], method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 337 | 5 | 0.103018 | [-0.0, 0.21] | 0.060795 | 0.467253 |
| Fronto Parietal | 337 | 6 | 0.044381 | [-0.06, 0.15] | 0.420946 | 0.126912 |
| Default Mode | 337 | 6 | 0.013182 | [-0.09, 0.12] | 0.811156 | 0.056523 |
| Subcortical-cereb | 337 | 5 | 0.098195 | [-0.01, 0.2] | 0.073975 | 0.432265 |
| Motor | 337 | 5 | 0.11352 | [0.01, 0.22] | 0.038704 | 0.544106 |
| Visual I | 337 | 6 | 0.166918 | [0.06, 0.27] | 0.002313 | 0.863411 |
| Visual II | 337 | 6 | -0.072465 | [-0.18, 0.04] | 0.188469 | 0.260411 |
| Visual Association | 337 | 5 | 0.055974 | [-0.05, 0.16] | 0.309227 | 0.174416 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df[targ_b]], axis = 1)
tmp_df.columns = ['sFC', targ_b]
g=sns.regplot(x='sFC', y=targ_b, data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
targ_b = b_list[4]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df[targ_b], method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 337 | 1 | 0.130074 | [0.02, 0.23] | 0.017054 | 0.666525 |
| Fronto Parietal | 337 | 1 | 0.192494 | [0.09, 0.29] | 0.000387 | 0.945416 |
| Default Mode | 337 | 1 | 0.026086 | [-0.08, 0.13] | 0.63374 | 0.076368 |
| Subcortical-cereb | 337 | 1 | 0.044225 | [-0.06, 0.15] | 0.419068 | 0.127552 |
| Motor | 337 | 1 | 0.101278 | [-0.01, 0.21] | 0.063696 | 0.459005 |
| Visual I | 337 | 1 | 0.102263 | [-0.0, 0.21] | 0.061146 | 0.466227 |
| Visual II | 337 | 5 | 0.100836 | [-0.01, 0.21] | 0.066495 | 0.451373 |
| Visual Association | 337 | 1 | 0.036772 | [-0.07, 0.14] | 0.501735 | 0.103102 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df[targ_b]], axis = 1)
tmp_df.columns = ['sFC', targ_b]
g=sns.regplot(x='sFC', y=targ_b, data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
targ_b = b_list[5]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df[targ_b], method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 337 | 0 | 0.043999 | [-0.06, 0.15] | 0.420759 | 0.126975 |
| Fronto Parietal | 337 | 0 | 0.012975 | [-0.09, 0.12] | 0.812418 | 0.056434 |
| Default Mode | 337 | 0 | 0.031307 | [-0.08, 0.14] | 0.566827 | 0.08834 |
| Subcortical-cereb | 337 | 0 | 0.129062 | [0.02, 0.23] | 0.01777 | 0.660956 |
| Motor | 337 | 0 | 0.113512 | [0.01, 0.22] | 0.03727 | 0.55024 |
| Visual I | 337 | 0 | 0.133082 | [0.03, 0.24] | 0.014491 | 0.687908 |
| Visual II | 337 | 0 | 0.017756 | [-0.09, 0.12] | 0.745351 | 0.062133 |
| Visual Association | 337 | 0 | 0.031031 | [-0.08, 0.14] | 0.57026 | 0.087653 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df[targ_b]], axis = 1)
tmp_df.columns = ['sFC', targ_b]
g=sns.regplot(x='sFC', y=targ_b, data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
targ_b = b_list[6]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df[targ_b], method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df[targ_b], method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df[targ_b], method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 337 | 1 | 0.181281 | [0.08, 0.28] | 0.000843 | 0.917677 |
| Fronto Parietal | 337 | 5 | -0.026654 | [-0.13, 0.08] | 0.628441 | 0.077212 |
| Default Mode | 337 | 4 | 0.116955 | [0.01, 0.22] | 0.032882 | 0.570337 |
| Subcortical-cereb | 337 | 0 | 0.076596 | [-0.03, 0.18] | 0.16063 | 0.289592 |
| Motor | 337 | 8 | 0.042956 | [-0.07, 0.15] | 0.437423 | 0.121474 |
| Visual I | 337 | 2 | 0.17713 | [0.07, 0.28] | 0.001131 | 0.90428 |
| Visual II | 337 | 5 | 0.002841 | [-0.1, 0.11] | 0.958869 | 0.050257 |
| Visual Association | 337 | 2 | 0.028842 | [-0.08, 0.14] | 0.598865 | 0.082243 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df[targ_b]], axis = 1)
tmp_df.columns = ['sFC', targ_b]
g=sns.regplot(x='sFC', y=targ_b, data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
sumDF = pd.DataFrame()
for b_idx, b_name in enumerate(b_list):
for n_idx, n_name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (n_idx+1))).copy()
tmp_corr = pg.corr(tmp_x.mean(axis=1), y_df[b_name])
tmp_corr = pd.concat([pd.DataFrame({'behav':b_list[b_idx], 'net':[net_list[n_idx]]}), tmp_corr.reset_index()], axis = 1)
sumDF = sumDF.append(tmp_corr)
sumDF.sort_values('p-val').head(5)
| behav | net | index | n | r | CI95% | p-val | BF10 | power | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | fi | Visual I | pearson | 337 | 0.266568 | [0.16, 0.36] | 6.852964e-07 | 1.46e+04 | 0.998814 |
| 0 | fi | Medial frontal | pearson | 337 | 0.213045 | [0.11, 0.31] | 8.086904e-05 | 155.968 | 0.977225 |
| 0 | dd | Medial frontal | pearson | 337 | 0.209698 | [0.11, 0.31] | 1.051257e-04 | 121.874 | 0.973530 |
| 0 | ps | Fronto Parietal | pearson | 337 | 0.207104 | [0.1, 0.31] | 1.284594e-04 | 100.965 | 0.970337 |
| 0 | dd | Visual I | pearson | 337 | 0.204123 | [0.1, 0.3] | 1.612497e-04 | 81.584 | 0.966286 |
plt.subplots(figsize=[15,6])
sns.barplot(data=sumDF, x='behav', y='r', hue='net')
plt.axhline(y=0.10, color='r', linewidth=1)
plt.axhline(y=-0.10, color='r', linewidth=1)
plt.legend(bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()
import bct
net_df = [];
md = [];
ge = [];
act = [];
for idx, name in enumerate(list(df_label.sn.values)):
X_df = df_target.loc[idx, '0':].values
y_df = df_label.loc[idx, 'fi']
X_mat = sp.spatial.distance.squareform(X_df)
X_mat_pos = deepcopy(X_mat)
X_mat_pos[X_mat < 0] = 0
M1, q1 = bct.community_louvain(W=X_mat, gamma=1, B='negative_asym', seed=22)
ge1 = bct.efficiency_wei(X_mat_pos, local=False)
md.append(q1)
ge.append(ge1)
act.append(y_df)
net_df = pd.DataFrame({'modularity': md,
'g_efficiency' : ge,
'behav_act': act})
pg.corr(data=net_df, x=net_df.modularity, y=net_df.behav_act)
| n | r | CI95% | p-val | BF10 | power | |
|---|---|---|---|---|---|---|
| pearson | 337 | 0.100778 | [-0.01, 0.21] | 0.064622 | 0.373 | 0.456441 |
sns.lmplot(data =net_df, x='modularity', y='behav_act')
<seaborn.axisgrid.FacetGrid at 0x14081a6d0>
pg.corr(data=net_df, x=net_df.g_efficiency, y=net_df.behav_act)
| n | r | CI95% | p-val | BF10 | power | |
|---|---|---|---|---|---|---|
| pearson | 337 | 0.190899 | [0.09, 0.29] | 0.000425 | 32.998 | 0.942597 |
sns.lmplot(data =net_df, x='g_efficiency', y='behav_act')
<seaborn.axisgrid.FacetGrid at 0x1401bf3a0>
proj_dir=os.getcwd()
kbri_dir=(proj_dir + '/kbri_data')
os.listdir(kbri_dir)
['.DS_Store', 'susAtt_behav.xlsx', 'shen_edge_info.csv', 'susAtt_behav.csv', 'sFC_edge_summary.csv']
# behav data
behav_dat = pd.read_csv('%s/susAtt_behav.csv' % (kbri_dir))
# fMRI data
fMRI_dat = pd.read_csv('%s/sFC_edge_summary.csv' % (kbri_dir))
behav_dat.shape, fMRI_dat.shape
((12, 4), (858672, 7))
behav_dat.head(3)
| sn | dprime | aprime | c | |
|---|---|---|---|---|
| 0 | 2 | 1.850727 | 0.803 | -1.635 |
| 1 | 3 | 2.504125 | 0.889 | -1.056 |
| 2 | 4 | 3.609470 | 0.971 | -0.539 |
fMRI_dat.head(3)
| sn | type | net | edge_from | edge_to | corr | corr_z | |
|---|---|---|---|---|---|---|---|
| 0 | 2 | rest | 0 | 1 | 2 | 0.087596 | 0.087821 |
| 1 | 2 | rest | 0 | 1 | 3 | 0.448782 | 0.483174 |
| 2 | 2 | rest | 2 | 1 | 4 | -0.104997 | -0.105386 |
df_target = fMRI_dat.copy()
df_label = behav_dat.copy()
df_target.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 858672 entries, 0 to 858671 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 sn 858672 non-null int64 1 type 858672 non-null object 2 net 858672 non-null int64 3 edge_from 858672 non-null int64 4 edge_to 858672 non-null int64 5 corr 858672 non-null float64 6 corr_z 858672 non-null float64 dtypes: float64(2), int64(4), object(1) memory usage: 45.9+ MB
df_label.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 12 entries, 0 to 11 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 sn 12 non-null int64 1 dprime 12 non-null float64 2 aprime 12 non-null float64 3 c 12 non-null float64 dtypes: float64(3), int64(1) memory usage: 512.0 bytes
fig, axes = plt.subplots(ncols=2, figsize=(12, 4))
msno.matrix(df=df_target, color=(0.63, 0.79, 0.95), ax=axes[0])
msno.matrix(df=df_label, color=(0.54, 0.81, 0.94), ax=axes[1])
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(1, 5, figsize=(20, 4))
sns.kdeplot(data=df_target['corr'], ax=axes[0])
sns.kdeplot(data=df_target['corr_z'], ax=axes[1])
sns.histplot(data=df_label['dprime'], kde=True, ax=axes[2])
sns.histplot(data=df_label['aprime'], kde=True, ax=axes[3])
sns.histplot(data=df_label['c'], kde=True, ax=axes[4])
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(1, 5, figsize=(20, 6))
sns.boxplot(data=df_target['corr'], ax=axes[0])
sns.boxplot(data=df_target['corr_z'], ax=axes[1])
sns.boxplot(data=df_label['dprime'], ax=axes[2])
sns.boxplot(data=df_label['aprime'], ax=axes[3])
sns.boxplot(data=df_label['c'], ax=axes[4])
plt.tight_layout()
plt.show()
sfc = fMRI_dat.loc[:,['sn','net','type','corr_z']]
# 컬럼에서 조건에 따른 로지컬 인덱스 생성
is_rest = (sfc['type'] == 'rest')
is_task = (sfc['type'] == 'task')
# 조건에 따라 데이터 추출
sfc_rest = sfc[is_rest]
sfc_task = sfc[is_task]
# 인덱스 열 추가
sfc_rest.insert(2, 'idx', 0)
sfc_task.insert(2, 'idx', 0)
# 인덱스 추가하기
for i in range(2,14):
sfc_rest.loc[sfc_rest["sn"] == i, "idx"] = list(range(1,sfc_rest.loc[sfc_rest["sn"] == i, "idx"].shape[0]+1))
sfc_task.loc[sfc_task["sn"] == i, "idx"] = list(range(1,sfc_task.loc[sfc_task["sn"] == i, "idx"].shape[0]+1))
# FC 데이터 포맷 변경 (long to wide format)
sfc_rest_w = sfc_rest.pivot(index='sn', columns='idx', values='corr_z')
sfc_task_w = sfc_task.pivot(index='sn', columns='idx', values='corr_z')
sfc_rest_w.shape, sfc_task_w.shape
((12, 35778), (12, 35778))
sfc_rest_w.head(3)
| idx | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ... | 35769 | 35770 | 35771 | 35772 | 35773 | 35774 | 35775 | 35776 | 35777 | 35778 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sn | |||||||||||||||||||||
| 2 | 0.087821 | 0.483174 | -0.105386 | 0.536837 | 0.206530 | 0.409045 | 0.018217 | 0.039137 | 0.259211 | 0.052181 | ... | -0.053328 | 0.123742 | -0.240273 | -0.065108 | 0.260724 | 0.079251 | 0.089720 | 0.295782 | 0.547787 | 0.336801 |
| 3 | 0.365570 | 0.340672 | 0.451663 | 0.311039 | 0.588303 | 0.203916 | 0.210759 | 0.318436 | 0.682319 | 0.215890 | ... | 0.403547 | 0.169584 | 0.035766 | 0.056895 | 0.523675 | 0.435298 | 0.402791 | 0.288749 | 0.325626 | 0.382869 |
| 4 | 0.099487 | -0.049594 | -0.032397 | -0.141527 | 0.890119 | -0.199865 | -0.291729 | -0.092610 | 0.172806 | -0.164627 | ... | 0.070985 | -0.059494 | -0.093339 | -0.177318 | 0.315748 | 0.275152 | 0.104687 | 0.503104 | 0.244396 | 0.223115 |
3 rows × 35778 columns
# Behavioural 데이터 포맷 변경 (long to wide format)
behav_dat_w = behav_dat
behav_dat_w.head(3)
| sn | dprime | aprime | c | |
|---|---|---|---|---|
| 0 | 2 | 1.850727 | 0.803 | -1.635 |
| 1 | 3 | 2.504125 | 0.889 | -1.056 |
| 2 | 4 | 3.609470 | 0.971 | -0.539 |
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
sns.kdeplot(data=sfc_rest['corr_z'], ax=axes[0])
sns.kdeplot(data=sfc_task['corr_z'], ax=axes[1])
sns.histplot(data=behav_dat_w['dprime'], kde=True, ax=axes[2])
plt.tight_layout()
plt.show()
def outlier_check(df, method=1):
if method == 1: # Boxplot Method
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
lower = Q1-(IQR*1.5)
upper = Q1+(IQR*1.5)
elif method == 2: # Normal Distribution Method
Mean = df.mean()
std = df.std()
lower = Mean-std*3
upper = Mean+std*3
out_mask = ((df < lower) | (df > upper))
out_info = pd.concat([pd.DataFrame(lower, columns=['lower']),
pd.DataFrame(upper, columns=['upper']),
pd.DataFrame(df.min(), columns=['min']),
pd.DataFrame(df.max(), columns=['max']),
pd.DataFrame(((df < lower) | (df > upper)).sum(), columns=['counts'])], axis=1)
return out_info, out_mask
rest_info, rest_mask = outlier_check(pd.DataFrame(sfc_rest['corr_z']), 2)
task_info, task_mask = outlier_check(pd.DataFrame(sfc_rest['corr_z']), 2)
behav_info, behav_mask = outlier_check(pd.DataFrame(behav_dat['dprime']), 2)
rest_info
| lower | upper | min | max | counts | |
|---|---|---|---|---|---|
| corr_z | -0.672815 | 0.729428 | -1.055361 | 1.655876 | 4194 |
task_info
| lower | upper | min | max | counts | |
|---|---|---|---|---|---|
| corr_z | -0.672815 | 0.729428 | -1.055361 | 1.655876 | 4194 |
behav_info
| lower | upper | min | max | counts | |
|---|---|---|---|---|---|
| dprime | 0.468664 | 4.932341 | 0.90884 | 3.60947 | 0 |
fig, axes = plt.subplots(1, 3, figsize=(12, 3))
sns.boxplot(y=sfc_rest['corr_z'], ax=axes[0])
sns.boxplot(y=sfc_task['corr_z'], ax=axes[1])
sns.boxplot(y=behav_dat_w['dprime'], ax=axes[2])
plt.tight_layout()
plt.show()
rest_np = np.array(sfc_rest_w)
task_np = np.array(sfc_task_w)
behav_np = np.array(behav_dat_w['dprime'])
rest_np.shape, task_np.shape, behav_np.shape
((12, 35778), (12, 35778), (12,))
fig, axes = plt.subplots(1, 3, figsize=[20, 6])
g1 = sns.heatmap(sp.spatial.distance.squareform(np.mean(rest_np, axis=0)),
vmax=1, vmin=-1,
square=True, cmap="vlag",
linecolor='white', ax = axes[0])
g1.set_title('rest')
g2 = sns.heatmap(sp.spatial.distance.squareform(np.mean(task_np, axis=0)),
vmax=1, vmin=-1,
square=True, cmap="vlag",
linecolor='white', ax = axes[1])
g2.set_title('task')
diff_np = rest_np - task_np
g3 = sns.heatmap(sp.spatial.distance.squareform(np.mean(diff_np, axis=0)),
vmax=1, vmin=-1,
square=True, cmap="vlag",
linecolor='white', ax = axes[2])
g3.set_title('rest-task')
plt.tight_layout()
plt.show()
subj_idx = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
rest_df = sfc_rest_w.copy()
task_df = sfc_task_w.copy()
behav_df = behav_dat_w[['sn', 'dprime']]
df_target_t = task_df.reset_index()
df_target_r = rest_df.reset_index()
df_label = behav_df
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold, mutual_info_regression, f_regression
from sklearn.feature_selection import SelectKBest, SelectPercentile
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.cross_decomposition import PLSRegression
# fs_method = ['none', 'VarThr', 'InfGain', 'f-Reg']
fs_method = ['none', 'f-Reg']
# 0 - none, 1 - variance Threshold, 2 - Information Gain, 3 - F-regression
fs_results = [];
x_fs = [];
x_tSn = [];
x_tSn_act = [];
x_tSn_r_pred = [];
x_tSn_t_pred = [];
x_nFeature_r = [];
x_nFeature_t = [];
for fs_idx, fs_name in enumerate(fs_method):
print("%d. method %s processing ----------------------------\n\n" % (fs_idx+1, fs_name))
for idx, this_sub in enumerate(subj_idx):
# subject index
train_subj = deepcopy(subj_idx)
train_subj.remove(this_sub)
test_subj = this_sub
# prepare data df_target_r.loc[df_target_r['sn'] != 2, 1:]
X_train_r = df_target_r.loc[df_target_r['sn'] != test_subj, 1:]
X_train_t = df_target_t.loc[df_target_t['sn'] != test_subj, 1:]
y_train = df_label.loc[df_label['sn'] != test_subj, 'dprime']
X_test_r = df_target_r.loc[df_target_r['sn'] == test_subj, 1:]
X_test_t = df_target_t.loc[df_target_t['sn'] == test_subj, 1:]
y_test = df_label.loc[df_label['sn'] == test_subj, 'dprime']
# feature scaling
scaler_r = StandardScaler()
targ_col_r = X_train_r.columns.values
X_train_r = scaler_r.fit_transform(X_train_r)
X_train_r = pd.DataFrame(X_train_r)
X_train_r.columns = targ_col_r
X_test_r = scaler_r.transform(X_test_r)
X_test_r = pd.DataFrame(X_test_r)
X_test_r.columns = targ_col_r
scaler_t = StandardScaler()
targ_col_t = X_train_t.columns.values
X_train_t = scaler_t.fit_transform(X_train_t)
X_train_t = pd.DataFrame(X_train_t)
X_train_t.columns = targ_col_t
X_test_t = scaler_r.transform(X_test_t)
X_test_t = pd.DataFrame(X_test_t)
X_test_t.columns = targ_col_t
# feature selection
if fs_name == 'none':
X_train_r_fs = np.array(X_train_r)
X_test_r_fs = np.array(X_test_r)
X_train_t_fs = np.array(X_train_t)
X_test_t_fs = np.array(X_test_t)
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'VarThr': # Variance Threshold
Threshold = 0.2
selector_var_r=VarianceThreshold(threshold=Threshold)
selector_var_r.fit(X_train_r)
is_support_var_r = selector_var_r.variances_ > Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_var_r].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_var_r].values])
selector_var_t=VarianceThreshold(threshold=Threshold)
selector_var_t.fit(X_train_t)
is_support_var_t = selector_var_t.variances_ > Threshold
X_train_t_fs = np.array(X_train_t[X_train_t.columns[is_support_var_t].values])
X_test_t_fs = np.array(X_test_t[X_test_t.columns[is_support_var_t].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'InfGain': # Information Gain
Threshold = 0.02
selector_ifg_r=SelectKBest(mutual_info_regression, k='all')
selector_ifg_r.fit(X_train_r, y_train)
is_support_ifg_r = selector_ifg_r.scores_ > Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_ifg_r].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_ifg_r].values])
selector_ifg_t=SelectKBest(mutual_info_regression, k='all')
selector_ifg_t.fit(X_train_t, y_train)
is_support_ifg_t = selector_ifg_t.scores_ > Threshold
X_train_t_fs = np.array(X_train_t[X_train_t.columns[is_support_ifg_t].values])
X_test_t_fs = np.array(X_test_t[X_test_t.columns[is_support_ifg_t].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'f-Reg':
Threshold = 0.05
selector_freg_r=SelectKBest(f_regression, k='all')
selector_freg_r.fit(X_train_r, y_train)
is_support_freg_r = selector_freg_r.pvalues_ < Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_freg_r].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_freg_r].values])
selector_freg_t=SelectKBest(f_regression, k='all')
selector_freg_t.fit(X_train_t, y_train)
is_support_freg_t = selector_freg_t.pvalues_ < Threshold
X_train_t_fs = np.array(X_train_t[X_train_t.columns[is_support_freg_t].values])
X_test_t_fs = np.array(X_test_t[X_test_t.columns[is_support_freg_t].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
#model_r = Ridge()
model_r = Ridge()
model_r.fit(X_train_r_fs, y_train_fs)
y_pred_r = model_r.predict(X_test_r_fs)
model_t = Ridge()
model_t.fit(X_train_t_fs, y_train_fs)
y_pred_t = model_t.predict(X_test_t_fs)
x_fs.append(fs_name)
x_tSn.append(this_sub)
x_tSn_act.append(y_test_fs[0]);
x_tSn_r_pred.append(y_pred_r[0]);
x_tSn_t_pred.append(y_pred_t[0]);
x_nFeature_r.append(X_train_r_fs.shape[1])
x_nFeature_t.append(X_train_t_fs.shape[1])
print("%d. method %s done ----------------------------\n\n" % (fs_idx+1, fs_name))
fs_results = pd.DataFrame({'fs_method': x_fs,
'tSN': x_tSn,
'behav_act': x_tSn_act,
'behav_pred_r': x_tSn_r_pred,
'behav_pred_t': x_tSn_t_pred,
'nFeature_r': x_nFeature_r,
'nFeature_t': x_nFeature_t})
1. method none processing ---------------------------- 1. method none done ---------------------------- 2. method f-Reg processing ---------------------------- 2. method f-Reg done ----------------------------
fs_results
| fs_method | tSN | behav_act | behav_pred_r | behav_pred_t | nFeature_r | nFeature_t | |
|---|---|---|---|---|---|---|---|
| 0 | none | 2 | 1.850727 | 2.716493 | 2.837457 | 35778 | 35778 |
| 1 | none | 3 | 2.504125 | 2.852289 | 2.952566 | 35778 | 35778 |
| 2 | none | 4 | 3.609470 | 2.408284 | 2.658876 | 35778 | 35778 |
| 3 | none | 5 | 3.132076 | 2.522827 | 2.556993 | 35778 | 35778 |
| 4 | none | 6 | 3.292870 | 2.648825 | 2.821549 | 35778 | 35778 |
| 5 | none | 7 | 2.726137 | 2.806323 | 2.931099 | 35778 | 35778 |
| 6 | none | 8 | 3.236010 | 2.681428 | 2.672097 | 35778 | 35778 |
| 7 | none | 9 | 3.096713 | 2.732247 | 2.786078 | 35778 | 35778 |
| 8 | none | 10 | 3.122660 | 2.727342 | 2.589718 | 35778 | 35778 |
| 9 | none | 11 | 2.559771 | 2.854476 | 2.775192 | 35778 | 35778 |
| 10 | none | 12 | 2.366633 | 2.863013 | 3.000740 | 35778 | 35778 |
| 11 | none | 13 | 0.908840 | 2.863300 | 2.900538 | 35778 | 35778 |
| 12 | f-Reg | 2 | 1.850727 | 2.730993 | 2.892313 | 1500 | 3582 |
| 13 | f-Reg | 3 | 2.504125 | 2.805360 | 2.977662 | 1711 | 3346 |
| 14 | f-Reg | 4 | 3.609470 | 2.436427 | 2.631878 | 1830 | 3218 |
| 15 | f-Reg | 5 | 3.132076 | 2.567411 | 2.588805 | 1711 | 3253 |
| 16 | f-Reg | 6 | 3.292870 | 2.715436 | 2.828534 | 1530 | 2890 |
| 17 | f-Reg | 7 | 2.726137 | 2.854476 | 2.936176 | 1718 | 3196 |
| 18 | f-Reg | 8 | 3.236010 | 2.647498 | 2.622482 | 1627 | 3107 |
| 19 | f-Reg | 9 | 3.096713 | 2.769266 | 2.738789 | 1561 | 3000 |
| 20 | f-Reg | 10 | 3.122660 | 2.724660 | 2.553300 | 1588 | 3194 |
| 21 | f-Reg | 11 | 2.559771 | 2.810588 | 2.835763 | 1599 | 3130 |
| 22 | f-Reg | 12 | 2.366633 | 2.840279 | 3.061159 | 1660 | 3493 |
| 23 | f-Reg | 13 | 0.908840 | 2.859096 | 2.970813 | 1344 | 1323 |
fs_method = ['none', 'f-Reg']
x_fs = [];
x_corr_r = [];
x_corr_p_r = [];
x_rmse_r = [];
x_mape_r = [];
x_nf_r = [];
x_corr_t = [];
x_corr_p_t = [];
x_rmse_t = [];
x_mape_t = [];
x_nf_t = [];
for idx, name in enumerate(fs_method):
tmp_df= fs_results.loc[fs_results['fs_method'] == name, ['behav_act', 'behav_pred_r', 'behav_pred_t','nFeature_r','nFeature_t']]
tmp_corr_r = pg.corr(tmp_df.behav_act, tmp_df.behav_pred_r)
rmse_r = mean_squared_error(tmp_df['behav_act'], tmp_df['behav_pred_r'])**0.5
mape_r = mean_absolute_percentage_error(tmp_df['behav_act'], tmp_df['behav_pred_r'])
tmp_corr_t = pg.corr(tmp_df.behav_act, tmp_df.behav_pred_t)
rmse_t = mean_squared_error(tmp_df['behav_act'], tmp_df['behav_pred_t'])**0.5
mape_t = mean_absolute_percentage_error(tmp_df['behav_act'], tmp_df['behav_pred_t'])
x_fs.append(name)
x_corr_r.append(tmp_corr_r['r'][0])
x_corr_p_r.append(tmp_corr_r['p-val'][0])
x_rmse_r.append(rmse_r)
x_mape_r.append(mape_r)
x_nf_r.append(np.mean(tmp_df.nFeature_r))
x_corr_t.append(tmp_corr_t['r'][0])
x_corr_p_t.append(tmp_corr_t['p-val'][0])
x_rmse_t.append(rmse_t)
x_mape_t.append(mape_t)
x_nf_t.append(np.mean(tmp_df.nFeature_t))
fs_fnl = pd.DataFrame({'fs_method': x_fs,
'rest_r': x_corr_r[0],
'rest_p_r': x_corr_p_r[0],
'rest_rmse': x_rmse_r,
'rest_mape': x_mape_r,
'rest_nF': x_nf_r,
'task_r': x_corr_t[0],
'task_p_r': x_corr_p_t[0],
'task_rmse': x_rmse_t,
'task_mape': x_mape_t,
'task_nF': x_nf_t})
fs_fnl
| fs_method | rest_r | rest_p_r | rest_rmse | rest_mape | rest_nF | task_r | task_p_r | task_rmse | task_mape | task_nF | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | none | -0.653271 | 0.021248 | 0.809107 | 0.354182 | 35778.000000 | -0.576662 | 0.049667 | 0.805560 | 0.363862 | 35778.0 |
| 1 | f-Reg | -0.653271 | 0.021248 | 0.796509 | 0.348558 | 1614.916667 | -0.576662 | 0.049667 | 0.839576 | 0.380987 | 3061.0 |
fig, axes = plt.subplots(1,3, figsize = [13,6])
for idx, name in enumerate(fs_method):
tmp_df= fs_results.loc[fs_results['fs_method'] == name, ['behav_act', 'behav_pred_r']]
sns.regplot(x='behav_act', y='behav_pred_r', data=tmp_df, ax = axes[idx]);
axes[idx].set_title(('%s' % name))
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(1,3, figsize = [13,6])
for idx, name in enumerate(fs_method):
tmp_df= fs_results.loc[fs_results['fs_method'] == name, ['behav_act', 'behav_pred_t']]
sns.regplot(x='behav_act', y='behav_pred_t', data=tmp_df, ax = axes[idx]);
axes[idx].set_title(('%s' % name))
plt.tight_layout()
plt.show()
# import module
from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import LinearRegression, BayesianRidge, Ridge, Lasso, BayesianRidge
from sklearn.svm import LinearSVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold, LeaveOneOut, GridSearchCV
from sklearn.model_selection import cross_validate, cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
def optimise_pls_cv(X, y, n_comp):
# Define PLS object
pls = PLSRegression(n_components=n_comp)
# Cross-validation
y_cv = cross_val_predict(pls, X, y, cv=12)
# Calculate scores
r2 = r2_score(y, y_cv)
mse = mean_squared_error(y, y_cv)
rpd = y.std()/np.sqrt(mse)
return (y_cv, r2, mse, rpd)
def pick_nc(vals1, vals2, vals3, ylabel1, ylabel2, ylabel3, objective1, objective2, objective3):
if objective1 == 'min':
idx = np.argmin(vals1)
else:
idx = np.argmax(vals1)
if objective2 == 'min':
idx = np.argmin(vals2)
else:
idx = np.argmax(vals2)
if objective3=='min':
idx = np.argmin(vals3)
else:
idx = np.argmax(vals3)
nc = xticks[idx]
return nc
# Plot the mses
def plot_metrics(vals1, vals2, vals3, ylabel1, ylabel2, ylabel3, objective1, objective2, objective3):
with plt.style.context('ggplot'):
plt.subplot(1,3,1)
plt.plot(xticks, np.array(vals1), '-v', color='blue', mfc='blue')
if objective1=='min':
idx = np.argmin(vals1)
else:
idx = np.argmax(vals1)
plt.plot(xticks[idx], np.array(vals1)[idx], 'P', ms=10, mfc='red')
plt.xlabel('Number of PLS components')
plt.xticks = xticks
plt.ylabel(ylabel1)
plt.title('PLS - nComp %d' % xticks[idx])
plt.subplot(1,3,2)
plt.plot(xticks, np.array(vals2), '-v', color='blue', mfc='blue')
if objective2=='min':
idx = np.argmin(vals2)
else:
idx = np.argmax(vals2)
plt.plot(xticks[idx], np.array(vals2)[idx], 'P', ms=10, mfc='red')
plt.xlabel('Number of PLS components')
plt.xticks = xticks
plt.ylabel(ylabel2)
plt.title('PLS - nComp %d' % xticks[idx])
plt.subplot(1,3,3)
plt.plot(xticks, np.array(vals3), '-v', color='blue', mfc='blue')
if objective3=='min':
idx = np.argmin(vals3)
else:
idx = np.argmax(vals3)
plt.plot(xticks[idx], np.array(vals3)[idx], 'P', ms=10, mfc='red')
plt.xlabel('Number of PLS components')
plt.xticks = xticks
plt.ylabel(ylabel3)
plt.title('PLS - Comp %d' % xticks[idx])
nc = xticks[idx]
return nc
plt.show()
x_targ = np.array(df_target_r.loc[:, 1:])
y_targ = np.array(df_label.loc[:,'dprime'])
# test with 20 components
r2s_r = []; mses_r = []; rpds_r = [];
xticks = np.arange(1, 20)
for n_comp in xticks:
y_cv, r2, mse, rpd = optimise_pls_cv(x_targ, y_targ, n_comp)
r2s_r.append(r2)
mses_r.append(mse)
rpds_r.append(rpd)
plt.figure(figsize=(15,5))
nc=plot_metrics(mses_r, rpds_r, r2s_r, 'MSE', 'RPD', 'R2', 'min', 'max', 'max')
plt.tight_layout()
plt.show()
print("\nRest-sFC", "/", "ncomp:", nc)
nc_r = nc
Rest-sFC / ncomp: 5
Pls = PLSRegression(n_components=nc_r)
Ridge = Ridge()
Lasso = Lasso()
SVR = LinearSVR()
# Linear = LinearRegression()
# bRdige = BayesianRidge()
# Tree = DecisionTreeRegressor()
Model_list = [Pls, Ridge, Lasso, SVR]
def simple_fit(model, X_train, y_train):
cv = KFold(n_splits = 10, random_state = 0, shuffle=True)
y_pred = cross_val_predict(model, X_train, y_train, cv=cv)
train_mse = mean_squared_error(y_train, y_pred)
train_rmse = mean_squared_error(y_train, y_pred)**0.5
train_mape = mean_absolute_percentage_error(y_train, y_pred)
train_r2 = r2_score(y_train, y_pred)
return y_pred.flatten(), train_mse, train_rmse, train_mape, train_r2
ms_df = pd.DataFrame(columns=[
'Model', 'MSE', 'RMSE', 'MAPE', 'R2', 'r', 'r_p'])
x_targ = df_target_r.loc[:, 1:]
y_targ = np.array(df_label.loc[:,'dprime'])
i = 0
for model in Model_list:
i += 1
print(model)
y_pred, train_mse, train_rmse, train_mape, train_r2 = simple_fit(model, x_targ, y_targ)
tmp_corr = pg.corr(y_targ, y_pred)
ms_tmp = {'Model': str(model),
'MSE': train_mse,
'RMSE': train_rmse,
'MAPE': train_mape ,
'R2': train_r2 ,
'r': tmp_corr['r'][0],
'r_p': tmp_corr['p-val'][0]}
ms_df = ms_df.append(ms_tmp, ignore_index=True)
PLSRegression(n_components=5) Ridge() Lasso() LinearSVR()
ms_df
| Model | MSE | RMSE | MAPE | R2 | r | r_p | |
|---|---|---|---|---|---|---|---|
| 0 | PLSRegression(n_components=5) | 0.628639 | 0.792868 | 0.346103 | -0.239101 | -0.560069 | 0.058245 |
| 1 | Ridge() | 0.632259 | 0.795147 | 0.346164 | -0.246237 | -0.575734 | 0.050122 |
| 2 | Lasso() | 0.571209 | 0.755784 | 0.326911 | -0.125903 | -0.652141 | 0.021549 |
| 3 | LinearSVR() | 0.556724 | 0.746140 | 0.290100 | -0.097351 | 0.147619 | 0.647070 |
fig, axes = plt.subplots(4,1, figsize = [12,8])
sns.barplot(data=ms_df, y='Model', x ='r', ax = axes[0])
sns.barplot(data=ms_df, y='Model', x ='RMSE', ax = axes[1])
sns.barplot(data=ms_df, y='Model', x ='MAPE', ax = axes[2])
sns.barplot(data=ms_df, y='Model', x ='R2', ax = axes[3])
plt.tight_layout()
plt.show()
b_type = ['dprime']
fs_method = ['f-Reg']
Pls = PLSRegression(n_components=nc_r)
SVR = LinearSVR()
Model_list = [Pls, SVR]
fs_results = [];
x_model = [];
x_model_comp = [];
x_btype = [];
x_fs = [];
x_tSn = [];
x_tSn_act = [];
x_tSn_r_pred = [];
x_nFeature = [];
for m_idx , m_name in enumerate(Model_list):
for b_idx , b_name in enumerate(b_type):
for fs_idx, fs_name in enumerate(fs_method):
print("%d. %s, %s, %s processing ----------------------------\n\n" % (fs_idx+1, m_name, b_name, fs_name))
for idx, this_sub in enumerate(subj_idx):
# subject index
train_subj = deepcopy(subj_idx)
train_subj.remove(this_sub)
test_subj = this_sub
# prepare data
X_train_r = df_target_r.loc[df_target_r['sn'] != test_subj, 1:]
y_train = df_label.loc[df_label['sn'] != test_subj, 'dprime']
X_test_r = df_target_r.loc[df_target_r['sn'] == test_subj, 1:]
y_test = df_label.loc[df_label['sn'] == test_subj, 'dprime']
# feature scaling
scaler = StandardScaler()
targ_col = X_train_r.columns.values
X_train_r = scaler.fit_transform(X_train_r)
X_train_r = pd.DataFrame(X_train_r)
X_train_r.columns = targ_col
X_test_r = scaler.transform(X_test_r)
X_test_r = pd.DataFrame(X_test_r)
X_test_r.columns = targ_col
# feature selection
if fs_name == 'none':
X_train_r_fs = np.array(X_train_r)
X_test_r_fs = np.array(X_test_r)
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
elif fs_name == 'f-Reg':
Threshold = 0.05
selector_freg=SelectKBest(f_regression, k='all')
selector_freg.fit(X_train_r, y_train)
is_support_freg = selector_freg.pvalues_ < Threshold
X_train_r_fs = np.array(X_train_r[X_train_r.columns[is_support_freg].values])
X_test_r_fs = np.array(X_test_r[X_test_r.columns[is_support_freg].values])
y_train_fs = np.array(y_train)
y_test_fs = np.array(y_test)
# PLSR optimization
r2s_r = []; mses_r = []; rpds_r = [];
xticks = np.arange(1, 10)
for n_comp in xticks:
y_cv, r2, mse, rpd = optimise_pls_cv(x_targ, y_targ, n_comp)
r2s_r.append(r2)
mses_r.append(mse)
rpds_r.append(rpd)
nc = pick_nc(mses_r, rpds_r, r2s_r, 'MSE', 'RPD', 'R2', 'min', 'max', 'max')
# Model Fitting & Test
# model_r = PLSRegression(n_components=nc)
model_r = m_name
model_r.fit(X_train_r_fs, y_train_fs)
y_pred_r = model_r.predict(X_test_r_fs)
# saving results
x_model.append(m_name)
x_model_comp.append(nc)
x_btype.append(b_name)
x_fs.append(fs_name)
x_tSn.append(this_sub)
x_tSn_act.append(y_test_fs[0]);
x_tSn_r_pred.append(y_pred_r[0]);
x_nFeature.append(X_train_r_fs.shape[1])
print("%d. %s, %s, %s done ----------------------------\n\n" % (fs_idx+1, m_name, b_name, fs_name))
fs_results = pd.DataFrame({'model': x_model,
'mComp': x_model_comp,
'ability': x_btype,
'fs': x_fs,
'nFeature': x_nFeature,
'tSN': x_tSn,
'behav_act': x_tSn_act,
'behav_pred': x_tSn_r_pred})
1. PLSRegression(n_components=5), dprime, f-Reg processing ---------------------------- 1. PLSRegression(n_components=5), dprime, f-Reg done ---------------------------- 1. LinearSVR(), dprime, f-Reg processing ---------------------------- 1. LinearSVR(), dprime, f-Reg done ----------------------------
fs_results
| model | mComp | ability | fs | nFeature | tSN | behav_act | behav_pred | |
|---|---|---|---|---|---|---|---|---|
| 0 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1500 | 2 | 1.850727 | [2.7309806312445595] |
| 1 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1711 | 3 | 2.504125 | [2.8053584457799885] |
| 2 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1830 | 4 | 3.609470 | [2.4364219634664734] |
| 3 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1711 | 5 | 3.132076 | [2.567438155339094] |
| 4 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1530 | 6 | 3.292870 | [2.715447530804656] |
| 5 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1718 | 7 | 2.726137 | [2.8544815741195495] |
| 6 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1627 | 8 | 3.236010 | [2.6475384080560858] |
| 7 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1561 | 9 | 3.096713 | [2.7692586088286593] |
| 8 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1588 | 10 | 3.122660 | [2.724660805786655] |
| 9 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1599 | 11 | 2.559771 | [2.810584657598049] |
| 10 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1660 | 12 | 2.366633 | [2.8402911175311654] |
| 11 | PLSRegression(n_components=5) | 5 | dprime | f-Reg | 1344 | 13 | 0.908840 | [2.859097938324951] |
| 12 | LinearSVR() | 5 | dprime | f-Reg | 1500 | 2 | 1.850727 | 2.730603 |
| 13 | LinearSVR() | 5 | dprime | f-Reg | 1711 | 3 | 2.504125 | 2.805019 |
| 14 | LinearSVR() | 5 | dprime | f-Reg | 1830 | 4 | 3.609470 | 2.435819 |
| 15 | LinearSVR() | 5 | dprime | f-Reg | 1711 | 5 | 3.132076 | 2.566718 |
| 16 | LinearSVR() | 5 | dprime | f-Reg | 1530 | 6 | 3.292870 | 2.715132 |
| 17 | LinearSVR() | 5 | dprime | f-Reg | 1718 | 7 | 2.726137 | 2.853996 |
| 18 | LinearSVR() | 5 | dprime | f-Reg | 1627 | 8 | 3.236010 | 2.647228 |
| 19 | LinearSVR() | 5 | dprime | f-Reg | 1561 | 9 | 3.096713 | 2.768777 |
| 20 | LinearSVR() | 5 | dprime | f-Reg | 1588 | 10 | 3.122660 | 2.724315 |
| 21 | LinearSVR() | 5 | dprime | f-Reg | 1599 | 11 | 2.559771 | 2.810391 |
| 22 | LinearSVR() | 5 | dprime | f-Reg | 1660 | 12 | 2.366633 | 2.839932 |
| 23 | LinearSVR() | 5 | dprime | f-Reg | 1344 | 13 | 0.908840 | 2.858604 |
tmp_df_1 = fs_results.loc[fs_results['model'] == fs_results.model[0],
['behav_act', 'behav_pred']]
tmp_df_1.behav_pred = np.concatenate(tmp_df_1.behav_pred.values)
tmp_df_2 = fs_results.loc[fs_results['model'] == fs_results.model[12],
['behav_act', 'behav_pred']]
tmp_df_2.behav_pred = tmp_df_2.behav_pred.astype(float)
tmp_corr_r = pg.corr(tmp_df_1.behav_act, tmp_df_1.behav_pred)
rmse_r = mean_squared_error(tmp_df_1.behav_act, tmp_df_1.behav_pred)**0.5
mape_r = mean_absolute_percentage_error(tmp_df_1.behav_act, tmp_df_1.behav_pred)
print(rmse_r, mape_r)
0.7965044427366488 0.3485560466307322
tmp_corr_r
| n | r | CI95% | p-val | BF10 | power | |
|---|---|---|---|---|---|---|
| pearson | 12 | -0.642623 | [-0.89, -0.11] | 0.024217 | 3.459 | 0.657718 |
tmp_corr_r = pg.corr(tmp_df_2.behav_act, tmp_df_2.behav_pred)
rmse_r = mean_squared_error(tmp_df_2.behav_act, tmp_df_2.behav_pred)**0.5
mape_r = mean_absolute_percentage_error(tmp_df_2.behav_act, tmp_df_2.behav_pred)
print(rmse_r, mape_r)
0.7965143753732008 0.34851986321714223
tmp_corr_r
| n | r | CI95% | p-val | BF10 | power | |
|---|---|---|---|---|---|---|
| pearson | 12 | -0.642387 | [-0.89, -0.11] | 0.024286 | 3.451 | 0.657264 |
edge_dat = pd.read_csv('%s/shen_edge_info.csv' % (kbri_dir))
net_list = ['Medial frontal', 'Fronto Parietal',
'Default Mode', 'Subcortical-cereb',
'Motor', 'Visual I', 'Visual II',
'Visual Association']
df_target_t = task_df.reset_index()
df_target_r = rest_df.reset_index()
df_label = behav_df
X_df = df_target_r.loc[:, 1:]
y_df = df_label.loc[:, 'dprime']
X_df = pd.concat([pd.DataFrame(edge_dat['net']),
X_df.T.reset_index(drop=True)], axis=1).T
X_df_1 = X_df.loc[0:, X_df.loc['net',:] == 1]
X_df_2 = X_df.loc[0:, X_df.loc['net',:] == 2]
X_df_3 = X_df.loc[0:, X_df.loc['net',:] == 3]
X_df_4 = X_df.loc[0:, X_df.loc['net',:] == 4]
X_df_5 = X_df.loc[0:, X_df.loc['net',:] == 5]
X_df_6 = X_df.loc[0:, X_df.loc['net',:] == 6]
X_df_7 = X_df.loc[0:, X_df.loc['net',:] == 7]
X_df_8 = X_df.loc[0:, X_df.loc['net',:] == 8]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df, method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df, method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df, method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df, method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 12 | 1 | -0.009091 | [-0.61, 0.59] | 0.978837 | 0.048958 |
| Fronto Parietal | 12 | 1 | 0.127273 | [-0.51, 0.68] | 0.709215 | 0.065344 |
| Default Mode | 12 | 1 | -0.236364 | [-0.73, 0.42] | 0.484091 | 0.108402 |
| Subcortical-cereb | 12 | 1 | 0.181818 | [-0.47, 0.7] | 0.592615 | 0.083179 |
| Motor | 12 | 1 | -0.109091 | [-0.67, 0.53] | 0.749509 | 0.06091 |
| Visual I | 12 | 1 | -0.163636 | [-0.7, 0.48] | 0.630685 | 0.076453 |
| Visual II | 12 | 1 | -0.018182 | [-0.61, 0.59] | 0.957685 | 0.049205 |
| Visual Association | 12 | 1 | 0.672727 | [0.12, 0.91] | 0.023313 | 0.667365 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df], axis = 1)
tmp_df.columns = ['sFC', 'susAtt']
g=sns.regplot(x='sFC', y='susAtt', data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
plt.subplots(figsize=[15,6])
sns.barplot(data=res.T.reset_index(), x='index', y='r')
plt.tight_layout()
plt.show()
X_df = df_target_t.loc[:, 1:]
y_df = df_label.loc[:, 'dprime']
X_df = pd.concat([pd.DataFrame(edge_dat['net']),
X_df.T.reset_index(drop=True)], axis=1).T
X_df_1 = X_df.loc[0:, X_df.loc['net',:] == 1]
X_df_2 = X_df.loc[0:, X_df.loc['net',:] == 2]
X_df_3 = X_df.loc[0:, X_df.loc['net',:] == 3]
X_df_4 = X_df.loc[0:, X_df.loc['net',:] == 4]
X_df_5 = X_df.loc[0:, X_df.loc['net',:] == 5]
X_df_6 = X_df.loc[0:, X_df.loc['net',:] == 6]
X_df_7 = X_df.loc[0:, X_df.loc['net',:] == 7]
X_df_8 = X_df.loc[0:, X_df.loc['net',:] == 8]
res = pd.concat([pg.corr(X_df_1.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_2.mean(axis=1), y_df, method='skipped'),
pg.corr(X_df_3.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_4.mean(axis=1), y_df, method='skipped'),
pg.corr(X_df_5.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_6.mean(axis=1), y_df, method='skipped'),
pg.corr(X_df_7.mean(axis=1), y_df, method='skipped'), pg.corr(X_df_8.mean(axis=1), y_df, method='skipped')]).T
res.columns = [net_list[0], net_list[1], net_list[2], net_list[3],
net_list[4], net_list[5], net_list[6], net_list[7]]
res.T
| n | outliers | r | CI95% | p-val | power | |
|---|---|---|---|---|---|---|
| Medial frontal | 12 | 1 | -0.036364 | [-0.62, 0.58] | 0.915468 | 0.050195 |
| Fronto Parietal | 12 | 1 | 0.136364 | [-0.5, 0.68] | 0.689309 | 0.067836 |
| Default Mode | 12 | 1 | 0.1 | [-0.53, 0.66] | 0.769875 | 0.058964 |
| Subcortical-cereb | 12 | 1 | 0.336364 | [-0.33, 0.78] | 0.311824 | 0.176936 |
| Motor | 12 | 1 | -0.081818 | [-0.65, 0.54] | 0.81099 | 0.055601 |
| Visual I | 12 | 1 | -0.372727 | [-0.79, 0.29] | 0.258926 | 0.210019 |
| Visual II | 12 | 1 | 0.2 | [-0.45, 0.71] | 0.555445 | 0.090724 |
| Visual Association | 12 | 1 | 0.354545 | [-0.31, 0.79] | 0.284693 | 0.192892 |
ax_list = [];
for i in range(2):
for j in range(4):
ax_list.append([int(i),int(j)])
fig, axes = plt.subplots(2,4, figsize = [12,6])
for idx, name in enumerate(net_list):
tmp_x = eval(('X_df_%d' % (idx+1))).copy()
tmp_df = pd.concat([tmp_x.mean(axis=1), y_df], axis = 1)
tmp_df.columns = ['sFC', 'susAtt']
g=sns.regplot(x='sFC', y='susAtt', data=tmp_df, ax = axes[ax_list[idx][0], ax_list[idx][1]]);
axes[ax_list[idx][0], ax_list[idx][1]].set_title(('%s' % name))
plt.tight_layout()
plt.show()
plt.subplots(figsize=[15,6])
sns.barplot(data=res.T.reset_index(), x='index', y='r')
plt.tight_layout()
plt.show()